package com.sicau.rag;

import lombok.extern.slf4j.Slf4j;
import org.springframework.ai.document.Document;
import org.springframework.ai.transformer.splitter.TokenTextSplitter;
import org.springframework.stereotype.Component;

import java.util.List;

/**
 * 自定义文本分割器
 */
@Slf4j
@Component
public class TokenCountTextSplitter {

    private final TokenTextSplitter splitter;

    public TokenCountTextSplitter() {
        splitter = new TokenTextSplitter(1000,
                400,
                10,
                5000,
                true);
    }

    /**
     * 基于token的文档分割
     * @param documents
     * @return
     */
    public List<Document> split(List<Document> documents) {
        log.info("文档分割");
        return splitter.apply(documents);
    }
}
